suppressPackageStartupMessages({
  library(GenomicRanges)
  library(epiwraps)
  library(ggplot2)
  library(rGREAT)
  library(AnnotationHub)
  library(ensembldb)
  library(bsseq)
  library(BiocParallel)
  library(edgeR)
  library(DMRcate)
  library(rtracklayer)
  library(sechm)
  library(pheatmap)
  library(viridis)
  library(data.table)
})

set.seed(40)

Load the data

Download:

options(timeout = 6000)
download.file("https://ethz-ins.org/content/w11_practical.zip", "w11_practical.zip")
dir.create("./w11_practical")
## Warning in dir.create("./w11_practical"): './w11_practical' already exists
unzip("w11_practical.zip", exdir="./w11_practical")

The .bigwig files have already been reduced to chromosome one and only have to be loaded here:

tracksGr <- list("ATAC"="./w11_practical/ATAC.rds",
                 "H3K27ac"="./w11_practical/H3K27ac.rds",
                 "H3K4me3"="./w11_practical/H3K4me3.rds",
                 "DNAme"="./w11_practical/DNAm.rds")
tracksGr <- lapply(tracksGr, readRDS)

Load the Annotation data

ah <- AnnotationHub()
ensdb <- ah[["AH89211"]] # GRCm38
## loading from cache

Obtaining the promoter coordinates of chromosome 1:

chr1 <-  GRanges(seqnames=Rle(c("1")), 
                          ranges = IRanges(1, end=195471971))

# We define promoters as the regions +/- 200 of the TSS
tssMargin <- 200
promoterRegions <- promoters(ensdb, upstream=tssMargin, downstream=tssMargin,
                             filter=GRangesFilter(chr1))

gene body coordinates:

geneBodies <- genes(ensdb, columns=c("gene_seq_start", "gene_seq_end"),
                    filter=GRangesFilter(chr1))

Enriched Heatmaps

Promoters

promoterRegions <- promoterRegions[1:2000]
seqlevelsStyle(promoterRegions) <- "UCSC"
smp <- signal2Matrix(tracksGr, promoterRegions, 
                       extend=1000, w=20, 
                       type="center",
                       smooth=TRUE)
## Computing signal from GRanges 'ATAC'...
## Computing signal from GRanges 'H3K27ac'...
## Computing signal from GRanges 'H3K4me3'...
## Computing signal from GRanges 'DNAme'...
plotEnrichedHeatmaps(smp, 
                     axis_name="TSS",
                     multiScale=TRUE,
                     use_raster=TRUE)

Clustering

cl <- clusterSignalMatrices(assays(smp)$input[,"DNAme", drop=FALSE], k=2)
##   ~62% of the variance explained by clusters
table(cl)
## cl
##    1    2 
## 1285  715
mycolors <- c("1"="#E69F00", "2"="#56B4E9") # row_split=cl, mean_color=mycolors
plotEnrichedHeatmaps(smp, 
                     axis_name = c("TSS"), 
                     row_split=cl,
                     scale_title="signal",
                     mean_color=mycolors,
                     multiScale=TRUE,
                     use_raster=TRUE)

For the colors see: Colorblind Color Palette (Discrete) and Scales

TF-Bindings

tracksDNAm <- readRDS("./w11_practical_extra/tracksDNAm_hb.rds")
bindingSites <- readRDS("./w11_practical_extra/ctcf_binding_sites_hb.rds")

smTfbs <- signal2Matrix(list("DNAm"=tracksDNAm), 
                        bindingSites, 
                        extend=1000, w=20, 
                        type="scale", smooth=TRUE)
## Computing signal from GRanges 'DNAm'...
plotEnrichedHeatmaps(smTfbs, 
                     axis_name = c("peak_start", "peak_end"),
                     use_raster=TRUE)

Gene bodies

se <- readRDS("./w11_practical_extra/EnrichmentSE.rds")
plotEnrichedHeatmaps(se, multiScale=TRUE)

Differential Methylation Testing

Bsseq object

The Bisulfite-sequenncing (BS-seq) data we are looking is from the bsseqData package. It contains colon cancer samples with 3 patients with each a colon cancer and normal colon sample. Here we only look at chromosome 22.

library(bsseq)
bs <- readRDS("./w11_practical/bs.rds")

rowRanges(bs)
## GRanges object with 578097 ranges and 0 metadata columns:
##            seqnames    ranges strand
##               <Rle> <IRanges>  <Rle>
##        [1]    chr22  16050097      *
##        [2]    chr22  16050114      *
##        [3]    chr22  16050174      *
##        [4]    chr22  16050206      *
##        [5]    chr22  16050213      *
##        ...      ...       ...    ...
##   [578093]    chr22  51244314      *
##   [578094]    chr22  51244503      *
##   [578095]    chr22  51244509      *
##   [578096]    chr22  51244532      *
##   [578097]    chr22  51244561      *
##   -------
##   seqinfo: 2 sequences from an unspecified genome; no seqlengths
pData(bs)
## DataFrame with 6 rows and 2 columns
##           Type        Pair
##    <character> <character>
## C1      cancer       pair1
## C2      cancer       pair2
## C3      cancer       pair3
## N1      normal       pair1
## N2      normal       pair2
## N3      normal       pair3

Testing

Get annotations (hs):

# genes
ensdb <- ah[["AH109336"]]
## loading from cache
chr22 <-  GRanges(seqnames=Rle(c("22")), 
                  ranges = IRanges(1, end=50818468))
genesChr22 <- genes(ensdb, columns=c("gene_seq_start", "gene_seq_end", "gene_name"),
                    filter=GRangesFilter(chr22))
seqlevelsStyle(genesChr22) <- "UCSC"

# promoters
tssMargin <- 200
promotersChr22 <- promoters(ensdb, upstream=tssMargin, downstream=tssMargin,
                             filter=GRangesFilter(chr22), columns=c("gene_name"))
seqlevelsStyle(promotersChr22) <- "UCSC"

Retrieve metyhlation levels and visualize:

metPr <- bsseq::getMeth(bs, 
                        regions=promotersChr22[1:100], 
                        what="perRegion")
colnames(metPr) <- colnames(bs)
rownames(metPr) <- promotersChr22$gene_name[1:100]
metPr <- metPr[!is.na(rowSums(metPr)),]

library(viridis)
library(pheatmap)

annotationCol <- as.data.frame(pData(bs))
rownames(annotationCol) <- colnames(metPr)
pheatmap::pheatmap(metPr, 
                   cluster_rows=TRUE,
                   cluster_cols=FALSE,
                   annotation_col=annotationCol,
                   show_rownames = TRUE,
                   color=rocket(10))

Differential methylation testing:

# design matrix
pData(bs)$Type <- relevel(as.factor(pData(bs)$Type), ref="normal")
design <- model.matrix(~Type+Pair, data=pData(bs)) 

# adapt for methylation data
methdesign <- modelMatrixMeth(design)
seqAnnot <- sequencing.annotate(bs, methdesign, 
                                all.cov=TRUE, 
                                coef="Typecancer")
## Filtering out all CpGs where at least one sample has zero coverage...
## Processing BSseq object...
## Transforming counts...
## Fitting model...
## Your contrast returned no individually significant CpGs. Consider increasing the 'fdr' parameter using changeFDR(), but be warned there is an increased risk of Type I errors.
dmrcateRes <- dmrcate(seqAnnot, 
                      C=2, 
                      min.cpgs=5,
                      pcutoff=0.05) #caution!
## Fitting chr22...
## Demarcating regions...
## Done!
dmrRanges <- extractRanges(dmrcateRes, genome="hg38")
## see ?DMRcatedata and browseVignettes('DMRcatedata') for documentation
## loading from cache
saveRDS(dmrRanges, "./w11_practical/dmr.rds")

dmrRanges <- dmrRanges[order(abs(dmrRanges$meandiff), decreasing=TRUE)]
DMR.plot(dmrRanges, dmr=1, phen.col=c(rep(mycolors[1], 3),
                                      rep(mycolors[2], 3)), 
         group.means=TRUE,
         CpGs=bs, genome="hg38")
## Ensembl site unresponsive, trying useast mirror

dmrRangesGenes <- dmrRanges[!is.na(dmrRanges$overlapping.genes)]

Obtain the coordinates of the genes within DMRs.

# Get the genes within Differentially methylated regions
topIdx <- order(dmrRangesGenes$min_smoothed_fdr)[1:10]
genesDmr <- unlist(tstrsplit(dmrRangesGenes[topIdx]$overlapping.genes, split=", "))
genesDmr <- genesDmr[!is.na(genesDmr)]
dmrGenes <- genesChr22[genesChr22$gene_name %in% genesDmr]
dmrGenes
## GRanges object with 15 ranges and 2 metadata columns:
##                   seqnames            ranges strand |   gene_name
##                      <Rle>         <IRanges>  <Rle> | <character>
##   ENSG00000093009    chr22 19479457-19520612      + |       CDC45
##   ENSG00000243902    chr22 37339583-37427445      - |       ELFN2
##   ENSG00000166897    chr22 37367960-37427479      - |       ELFN2
##   ENSG00000176177    chr22 39743044-39893864      - |      ENTHD1
##   ENSG00000138964    chr22 44172956-44219533      + |       PARVG
##               ...      ...               ...    ... .         ...
##   ENSG00000077942    chr22 45502238-45601135      + |       FBLN1
##   ENSG00000130943    chr22 46255663-46263343      - |      PKDREJ
##   ENSG00000100416    chr22 46330875-46357340      + |        TRMU
##   ENSG00000075275    chr22 46360834-46537620      - |      CELSR1
##   ENSG00000205632    chr22 48866770-48898386      + |   LINC01310
##                           gene_id
##                       <character>
##   ENSG00000093009 ENSG00000093009
##   ENSG00000243902 ENSG00000243902
##   ENSG00000166897 ENSG00000166897
##   ENSG00000176177 ENSG00000176177
##   ENSG00000138964 ENSG00000138964
##               ...             ...
##   ENSG00000077942 ENSG00000077942
##   ENSG00000130943 ENSG00000130943
##   ENSG00000100416 ENSG00000100416
##   ENSG00000075275 ENSG00000075275
##   ENSG00000205632 ENSG00000205632
##   -------
##   seqinfo: 1 sequence from hg38 genome